# coding: utf-8
import re, datetime

s = u'<div id="maintit"><h1>上海电力(600021)基本资料</h1>'

item = u'<div class="tishi">(\S+)</div></td><td class="(solidborder|bgcolor)">\
(\d+)</td><td class="(solidborder|bgcolor)">(\S+)%</td><td'

s3 = u'<div class="tishi">中国电力投资集团公司</div></td><td class="solidbr">916646315</td><td class="solidbo">42.839%</td><td>流通A股</td>'

s4 = u'<div class="tishi">中国电力国际发展有限</div></td><td class="bgcolor">403465793</td><td class="bgcolor">18.855%</td><td class="lastbgcolor">流通A股'

#m = re.search(u'.+>(\S+)\((\d+)\).+</h1>', s)
#print(m.group(1))
#print(m.group(2))

stock_date_re = re.compile(u'截止日期：(\S+)\s+公告日期：(\S+)</.*')
item_re = re.compile(item)

s2 = u'截止日期：2011年03月31日 公告日期：2011年04月22日</div><div class="blueborder table2">\
<table cellspacing="0" cellpadding="0" border="0" width="788"><tbody><tr align="center">\
<td class="headbgcolor"><strong>编号</strong></td><td class="headbgcolor"><strong>股东名称</strong>\
</td><td class="headbgcolor"><strong>持股数(股)</strong></td><td class="headbgcolor"><strong>持股比例\
</strong></td><td class="headlastbgcolor"><strong>股份性质</strong></td></tr><tr align="center">\
<td class="solidborder">1</td><td class="solidborder"><div class="tishi">中国电力投资集团公司</div>\
</td><td class="solidborder">916646315</td><td class="solidborder">42.839%</td><td>流通A股</td>\
</tr><tr align="center"><td class="bgcolor">2</td><td class="bgcolor"><div class="tishi">\
中国电力国际发展有限公司</div></td><td class="bgcolor">403465793</td><td class="bgcolor">\
18.855%</td><td class="lastbgcolor">流通A股</td></tr><tr align="center"><td class="solidborder">3\
</td><td class="solidborder"><div class="tishi">中国长江电力股份有限公司</div></td><td class="solidborder">\
203371562</td><td class="solidborder">9.504%</td><td>流通A股</td></tr><tr align="center">\
<td class="bgcolor">4</td><td class="bgcolor"><div class="tishi">唐建华</div></td><td class="bgcolor">\
15971479</td><td class="bgcolor">0.746%</td><td class="lastbgcolor">流通A股</td></tr><tr align="center">\
<td class="solidborder">5</td><td class="solidborder"><div class="tishi">上海裕盛投资管理有限公司</div></td>\
<td class="solidborder">8661516</td><td class="solidborder">0.404%</td><td>流通A股</td></tr><tr align="center">\
<td class="bgcolor">6</td><td class="bgcolor"><div class="tishi">曹园</div></td><td class="bgcolor">4968382</td>\
<td class="bgcolor">0.232%</td><td class="lastbgcolor">流通A股</td></tr><tr align="center">\
<td class="solidborder">7</td><td class="solidborder"><div class="tishi">张见花</div></td>\
<td class="solidborder">4608200</td><td class="solidborder">0.215%</td><td>流通A股</td></tr>\
<tr align="center"><td class="bgcolor">8</td><td class="bgcolor"><div class="tishi">\
光大永明人寿保险有限公司</div></td><td class="bgcolor">3499900</td><td class="bgcolor">\
0.163%</td><td class="lastbgcolor">流通A股</td></tr><tr align="center"><td class="solidborder">9\
<td class="solidborder">0.158%</td><td>流通A股</td></tr><tr align="center"><td class="bgcolor">10</td>\
<td class="bgcolor"><div class="tishi">缪春敏</div></td><td class="bgcolor">3157090</td><td class="bgcolor">\
0.147%</td><td class="lastbgcolor">流通A股</td></tr></tbody></table></div></li><li style="display:none">'


#ls = item_re.findall(s2)
#for i in ls:
#    print i[4]

quarter_date_s = u'2011年03月31日'
quarter_date_re = re.compile(u'(\d{4})年(\d{2})月(\d{2})日')
quarter_date_g = quarter_date_re.search(quarter_date_s)
#print quarter_date_g.group(1)

#d = datetime.date(int(g2.group(1)), int(g2.group(2)), int(g2.group(3)))
#print (d)


stock_item_s = u"[ ['000001','深发展Ａ',18.20,-2.26,18.62,18.60,18.60,17.91,500495.93,911382614,1.61,3.71,1.12], \
['000002','万 科Ａ',8.48,2.05,8.31,8.32,8.51,8.31,382426.56,322176220,0.40,2.41,0.88], \
['000004','ST国农',12.55,1.46,12.37,12.35,12.67,12.30,6097.33,7635055,0.73,2.99,0.44],"
stock_item_re = re.compile(u'\[\'(\d{6})\',\'(\D+)\',\d')
stock_item_g = stock_item_re.findall(stock_item_s)
#for i in stock_item_g:
    #print i[1]

    
holder_count_s =u'<td style="background-image:none;">11年第1季</td>\
<td>27,010</td><td class="black_5_de">8,621.41</td>\
<td><font color="red">-100.09%</font></td>'

holder_count_re =re.compile('<td style="background-image:none;">(\S+)</td>\
<td>([\d,]+)</td><td \S+>([\d,.]+)</td>\
<td><font \S+>([\d.\\%-]+)</font>')
holder_count_g = holder_count_re.findall(holder_count_s)

#for i in holder_count_g:
    #print i[0]
    
s = '-333,33%%'
s=re.sub('[,\\%]','',s)

print s


